get all the imports to a requirements.txt file

run on shell:

grep -r import * > imports.txt


In [18]:
import re

import numpy as np
import pandas as pd

In [12]:
with open("imports.txt") as fp:
    raw = fp.readlines()
df = pd.Series(raw)

In [13]:
len(df)


Out[13]:
20597891

In [14]:
df.head()


Out[14]:
0                             "import pymc as pm\n",\n
1                            "import numpy as np\n",\n
2        "import matplotlib.pyplot as plt, seaborn ...
3        "Oh, and if I was really giving too much a...
4                            "import numpy as np\n",\n
dtype: object

In [15]:
df[0]


Out[15]:
'    "import pymc as pm\\n",\n'

In [16]:
df = df.str.strip()

In [17]:
df.head()


Out[17]:
0                               "import pymc as pm\n",
1                              "import numpy as np\n",
2    "import matplotlib.pyplot as plt, seaborn as s...
3    "Oh, and if I was really giving too much advic...
4                              "import numpy as np\n",
dtype: object

In [20]:
re.findall(r'import\ {.*}\ ', df[0])


Out[20]:
[]

In [27]:
df[0:10].apply(split('import')[1].split().head())


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-995b84194821> in <module>()
----> 1 df[0:10].apply(split('import')[1].split().head())

NameError: name 'split' is not defined

In [57]:
ans = [v.split('import')[1].split()[0].strip().split('.')[0].replace('\\n', '').replace('`', '').replace('"', '')
       for v in df]

In [58]:
ans = pd.Series(ans)

In [59]:
len(ans)


Out[59]:
20597891

In [60]:
ans = pd.Series(ans.unique())
ans


Out[60]:
0                                                   pymc
1                                                  numpy
2                                             matplotlib
3                                                seaborn
4                                                spacepy
5                                                  pymc,
6                                                 logit,
7                                       DiscreteUniform,
8                                                pprint,
9                                              datetime,
10                                              LogNorm,
11                                                  ant;
12                                               :before
13                                               </span>
14                                               theano,
15                                                 pymc3
16                                                theano
17                                              sklearn,
18                                             datasets,
19                                                scale,
20                                     train_test_split,
21                                            make_moons
22                                                  zip,
23                                               sklearn
24                                              datasets
25                                                 scale
26                                      train_test_split
27                                                   zip
28                                                Image,
29                                                  HTML
                             ...                        
148                                                  io,
149                                                   os
150                                                 glob
151                                                 from
152                                   FlickerLikelihood,
153                                            Wavelets,
154                                    LineFlickerModel,
155                                           pyspeckit,
156                                       savgol_filter,
157                                              Matplot
158                                            factorial
159                                               xarray
160                                           matnpotlib
161                                               pprint
162                                                data,
163                                              random,
164                                 multivariate_normal,
165                                              bisect,
166                                               bisect
167                                             warnings
168                                            invlogit,
169                                                plot,
170                                               zeros,
171                                                 pdb,
172                                              signal,
173                                              odeint,
174                                           constants,
175    _r__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mou...
176                               _r__\u001b[0;34m(self,
177                                 __\u001b[0;34m(self,
Length: 178, dtype: object

In [64]:
print(len(ans))
ans = ans[ans.str.contains("/") == False]
len(ans)


178
Out[64]:
176

In [65]:
print(len(ans))
ans = ans[ans.str.contains(r"\\") == False]
len(ans)


176
Out[65]:
172

In [66]:
ans = pd.Series(ans.unique())
ans


Out[66]:
0                      pymc
1                     numpy
2                matplotlib
3                   seaborn
4                   spacepy
5                     pymc,
6                    logit,
7          DiscreteUniform,
8                   pprint,
9                 datetime,
10                 LogNorm,
11                     ant;
12                  :before
13                  theano,
14                    pymc3
15                   theano
16                 sklearn,
17                datasets,
18                   scale,
19        train_test_split,
20               make_moons
21                     zip,
22                  sklearn
23                 datasets
24                    scale
25         train_test_split
26                      zip
27                   Image,
28                     HTML
29                    Image
               ...         
142     make_axes_locatable
143             namedtuple,
144                  antly,
145                     io,
146                      os
147                    glob
148                    from
149      FlickerLikelihood,
150               Wavelets,
151       LineFlickerModel,
152              pyspeckit,
153          savgol_filter,
154                 Matplot
155               factorial
156                  xarray
157              matnpotlib
158                  pprint
159                   data,
160                 random,
161    multivariate_normal,
162                 bisect,
163                  bisect
164                warnings
165               invlogit,
166                   plot,
167                  zeros,
168                    pdb,
169                 signal,
170                 odeint,
171              constants,
Length: 172, dtype: object

In [71]:
ans = pd.Series(ans.unique())
ans


Out[71]:
0                     pymc
1                    numpy
2               matplotlib
3                  seaborn
4                  spacepy
5                    logit
6          DiscreteUniform
7                   pprint
8                 datetime
9                  LogNorm
10                    ant;
11                 :before
12                  theano
13                   pymc3
14                 sklearn
15                datasets
16                   scale
17        train_test_split
18              make_moons
19                     zip
20                   Image
21                    HTML
22                  pandas
23                    tqdm
24                   stats
25                   scipy
26                   Model
27                  sample
28               traceplot
29            Interpolated
              ...         
104                  expit
105                 shared
106            OrderedDict
107                   chi2
108                getitem
109             GetMCAfile
110                 Cursor
111    make_axes_locatable
112             namedtuple
113                  antly
114                     io
115                   glob
116                   from
117      FlickerLikelihood
118               Wavelets
119       LineFlickerModel
120              pyspeckit
121          savgol_filter
122                Matplot
123              factorial
124                 xarray
125             matnpotlib
126                   data
127                 bisect
128               invlogit
129                  zeros
130                    pdb
131                 signal
132                 odeint
133              constants
Length: 134, dtype: object

In [70]:
ans = ans.apply(lambda x: x.replace(',', ''))

In [ ]:


In [73]:
with open('requirements.txt', 'w') as fp:
    for f in sorted(ans.values):
        fp.write('{}\n'.format(f))

In [ ]: